libraries here
r library(readxl) library(classdata) library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages -------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3 ## v tibble 2.1.3 v dplyr 0.8.3 ## v tidyr 1.0.0 v stringr 1.4.0 ## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------- tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag()
r library(stringr) library(ggplot2) library(plotly)
## ## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2': ## ## last_plot
## The following object is masked from 'package:stats': ## ## filter
## The following object is masked from 'package:graphics': ## ## layout

import data here

fight_songs = read.csv('fight-songs.csv')
ipeds <- read_excel('IPEDS_data.xlsx', sheet='Data')
lookup_table = read.csv('lookup_table.csv')
cleaning data (VERSION 1)
1) loop through all values ipeds and fight_songs
2) add if similar name, otherwise print which names did not work
```r cleaned_ipeds <- data.frame() i <- 1 for (name in fight_songs\(school){ if (!is.na(pmatch(name, ipeds\)Name, duplicates.ok = FALSE))){ picked_row <- (pmatch(name, ipeds\(Name)) foo <- paste("works: ", name) foo <- paste(foo, ipeds\)Name[picked_row]) print(foo)
cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] ) } else{ foo <- paste(“doesn’t work:”, name) # print(foo) } } ```
## [1] "works: Baylor Baylor University" ## [1] "works: Iowa State Iowa State University" ## [1] "works: Kansas State Kansas State University" ## [1] "works: Oklahoma State Oklahoma State University-Main Campus" ## [1] "works: Texas Tech Texas Tech University" ## [1] "works: Maryland Maryland University of Integrative Health" ## [1] "works: Michigan State Michigan State University" ## [1] "works: Nebraska Nebraska Wesleyan University" ## [1] "works: Wisconsin Wisconsin Lutheran College" ## [1] "works: Arizona State Arizona State University-Tempe" ## [1] "works: Oregon State Oregon State University" ## [1] "works: Stanford Stanford University" ## [1] "works: Washington State Washington State University" ## [1] "works: Mississippi State Mississippi State University" ## [1] "works: South Carolina South Carolina State University" ## [1] "works: Vanderbilt Vanderbilt University" ## [1] "works: Boston College Boston College" ## [1] "works: Clemson Clemson University" ## [1] "works: Duke Duke University" ## [1] "works: Florida State Florida State University" ## [1] "works: Miami Miami University-Oxford" ## [1] "works: North Carolina State North Carolina State University at Raleigh" ## [1] "works: Pitt Pittsburg State University" ## [1] "works: Syracuse Syracuse University" ## [1] "works: Wake Forest Wake Forest University"
r for(row in 1:nrow(lookup_table)){ location <- lookup_table$Column[row] i <- i + 1 cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[location,] ) }

cleaning the data (VERSION 2!) added a new column for the IDs in ipeds. allows for better, through cleaning. Some words weren’t used correctly in pmatch.

for (id in fight_songs$"ID Number"){
  if (!is.na(match(name, ipeds$"ID Number"))){
    picked_row <- (match(name, ipeds$"ID Number"))
    foo <- paste("works: ", name)
    cleaned_ipeds <- rbind(cleaned_ipeds, ipeds[picked_row,] )
  }
  else{
    print("ERROR")
  }
}
cleaned_ipeds
fight_songs$ID.number <- as.double(fight_songs$ID.number)

names(fight_songs)[28] <- "ID number"

fight_songs
joining the data
```r cleaned_ipeds$year <- NULL
join_data <- left_join(cleaned_ipeds, fight_songs) ```
## Joining, by = "ID number"

jessies part

join_data2 <- left_join(fight_songs, cleaned_ipeds)
## Joining, by = "ID number"
colnames(join_data2)
##   [1] "school"                                                                                
##   [2] "conference"                                                                            
##   [3] "song_name"                                                                             
##   [4] "writers"                                                                               
##   [5] "year"                                                                                  
##   [6] "student_writer"                                                                        
##   [7] "official_song"                                                                         
##   [8] "contest"                                                                               
##   [9] "bpm"                                                                                   
##  [10] "sec_duration"                                                                          
##  [11] "fight"                                                                                 
##  [12] "number_fights"                                                                         
##  [13] "victory"                                                                               
##  [14] "win_won"                                                                               
##  [15] "victory_win_won"                                                                       
##  [16] "rah"                                                                                   
##  [17] "nonsense"                                                                              
##  [18] "colors"                                                                                
##  [19] "men"                                                                                   
##  [20] "opponents"                                                                             
##  [21] "spelling"                                                                              
##  [22] "trope_count"                                                                           
##  [23] "spotify_id"                                                                            
##  [24] "X2019_FB_Wins"                                                                         
##  [25] "X2019_FB_Losses"                                                                       
##  [26] "Niche_Athletic_Rank"                                                                   
##  [27] "Niche_Party_Rank"                                                                      
##  [28] "ID number"                                                                             
##  [29] "Name"                                                                                  
##  [30] "ZIP code"                                                                              
##  [31] "Highest degree offered"                                                                
##  [32] "County name"                                                                           
##  [33] "Longitude location of institution"                                                     
##  [34] "Latitude location of institution"                                                      
##  [35] "Religious affiliation"                                                                 
##  [36] "Offers Less than one year certificate"                                                 
##  [37] "Offers One but less than two years certificate"                                        
##  [38] "Offers Associate's degree"                                                             
##  [39] "Offers Two but less than 4 years certificate"                                          
##  [40] "Offers Bachelor's degree"                                                              
##  [41] "Offers Postbaccalaureate certificate"                                                  
##  [42] "Offers Master's degree"                                                                
##  [43] "Offers Post-master's certificate"                                                      
##  [44] "Offers Doctor's degree - research/scholarship"                                         
##  [45] "Offers Doctor's degree - professional practice"                                        
##  [46] "Offers Doctor's degree - other"                                                        
##  [47] "Offers Other degree"                                                                   
##  [48] "Applicants total"                                                                      
##  [49] "Admissions total"                                                                      
##  [50] "Enrolled total"                                                                        
##  [51] "Percent of freshmen submitting SAT scores"                                             
##  [52] "Percent of freshmen submitting ACT scores"                                             
##  [53] "SAT Critical Reading 25th percentile score"                                            
##  [54] "SAT Critical Reading 75th percentile score"                                            
##  [55] "SAT Math 25th percentile score"                                                        
##  [56] "SAT Math 75th percentile score"                                                        
##  [57] "SAT Writing 25th percentile score"                                                     
##  [58] "SAT Writing 75th percentile score"                                                     
##  [59] "ACT Composite 25th percentile score"                                                   
##  [60] "ACT Composite 75th percentile score"                                                   
##  [61] "Estimated enrollment, total"                                                           
##  [62] "Estimated enrollment, full time"                                                       
##  [63] "Estimated enrollment, part time"                                                       
##  [64] "Estimated undergraduate enrollment, total"                                             
##  [65] "Estimated undergraduate enrollment, full time"                                         
##  [66] "Estimated undergraduate enrollment, part time"                                         
##  [67] "Estimated freshman undergraduate enrollment, total"                                    
##  [68] "Estimated freshman enrollment, full time"                                              
##  [69] "Estimated freshman enrollment, part time"                                              
##  [70] "Estimated graduate enrollment, total"                                                  
##  [71] "Estimated graduate enrollment, full time"                                              
##  [72] "Estimated graduate enrollment, part time"                                              
##  [73] "Associate's degrees awarded"                                                           
##  [74] "Bachelor's degrees awarded"                                                            
##  [75] "Master's degrees awarded"                                                              
##  [76] "Doctor's degrese - research/scholarship awarded"                                       
##  [77] "Doctor's degrees - professional practice awarded"                                      
##  [78] "Doctor's degrees - other awarded"                                                      
##  [79] "Certificates of less than 1-year awarded"                                              
##  [80] "Certificates of 1 but less than 2-years awarded"                                       
##  [81] "Certificates of 2 but less than 4-years awarded"                                       
##  [82] "Postbaccalaureate certificates awarded"                                                
##  [83] "Post-master's certificates awarded"                                                    
##  [84] "Number of students receiving an Associate's degree"                                    
##  [85] "Number of students receiving a Bachelor's degree"                                      
##  [86] "Number of students receiving a Master's degree"                                        
##  [87] "Number of students receiving a Doctor's degree"                                        
##  [88] "Number of students receiving a certificate of less than 1-year"                        
##  [89] "Number of students receiving a certificate of 1 but less than 4-years"                 
##  [90] "Number of students receiving a Postbaccalaureate or Post-master's certificate"         
##  [91] "Percent admitted - total"                                                              
##  [92] "Admissions yield - total"                                                              
##  [93] "Tuition and fees, 2010-11"                                                             
##  [94] "Tuition and fees, 2011-12"                                                             
##  [95] "Tuition and fees, 2012-13"                                                             
##  [96] "Tuition and fees, 2013-14"                                                             
##  [97] "Total price for in-state students living on campus 2013-14"                            
##  [98] "Total price for out-of-state students living on campus 2013-14"                        
##  [99] "State abbreviation"                                                                    
## [100] "FIPS state code"                                                                       
## [101] "Geographic region"                                                                     
## [102] "Sector of institution"                                                                 
## [103] "Level of institution"                                                                  
## [104] "Control of institution"                                                                
## [105] "Historically Black College or University"                                              
## [106] "Tribal college"                                                                        
## [107] "Degree of urbanization (Urban-centric locale)"                                         
## [108] "Carnegie Classification 2010: Basic"                                                   
## [109] "Total  enrollment"                                                                     
## [110] "Full-time enrollment"                                                                  
## [111] "Part-time enrollment"                                                                  
## [112] "Undergraduate enrollment"                                                              
## [113] "Graduate enrollment"                                                                   
## [114] "Full-time undergraduate enrollment"                                                    
## [115] "Part-time undergraduate enrollment"                                                    
## [116] "Percent of total enrollment that are American Indian or Alaska Native"                 
## [117] "Percent of total enrollment that are Asian"                                            
## [118] "Percent of total enrollment that are Black or African American"                        
## [119] "Percent of total enrollment that are Hispanic/Latino"                                  
## [120] "Percent of total enrollment that are Native Hawaiian or Other Pacific Islander"        
## [121] "Percent of total enrollment that are White"                                            
## [122] "Percent of total enrollment that are two or more races"                                
## [123] "Percent of total enrollment that are Race/ethnicity unknown"                           
## [124] "Percent of total enrollment that are Nonresident Alien"                                
## [125] "Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander"           
## [126] "Percent of total enrollment that are women"                                            
## [127] "Percent of undergraduate enrollment that are American Indian or Alaska Native"         
## [128] "Percent of undergraduate enrollment that are Asian"                                    
## [129] "Percent of undergraduate enrollment that are Black or African American"                
## [130] "Percent of undergraduate enrollment that are Hispanic/Latino"                          
## [131] "Percent of undergraduate enrollment that are Native Hawaiian or Other Pacific Islander"
## [132] "Percent of undergraduate enrollment that are White"                                    
## [133] "Percent of undergraduate enrollment that are two or more races"                        
## [134] "Percent of undergraduate enrollment that are Race/ethnicity unknown"                   
## [135] "Percent of undergraduate enrollment that are Nonresident Alien"                        
## [136] "Percent of undergraduate enrollment that are Asian/Native Hawaiian/Pacific Islander"   
## [137] "Percent of undergraduate enrollment that are women"                                    
## [138] "Percent of graduate enrollment that are American Indian or Alaska Native"              
## [139] "Percent of graduate enrollment that are Asian"                                         
## [140] "Percent of graduate enrollment that are Black or African American"                     
## [141] "Percent of graduate enrollment that are Hispanic/Latino"                               
## [142] "Percent of graduate enrollment that are Native Hawaiian or Other Pacific Islander"     
## [143] "Percent of graduate enrollment that are White"                                         
## [144] "Percent of graduate enrollment that are two or more races"                             
## [145] "Percent of graduate enrollment that are Race/ethnicity unknown"                        
## [146] "Percent of graduate enrollment that are Nonresident Alien"                             
## [147] "Percent of graduate enrollment that are Asian/Native Hawaiian/Pacific Islander"        
## [148] "Percent of graduate enrollment that are women"                                         
## [149] "Number of first-time undergraduates - in-state"                                        
## [150] "Percent of first-time undergraduates - in-state"                                       
## [151] "Number of first-time undergraduates - out-of-state"                                    
## [152] "Percent of first-time undergraduates - out-of-state"                                   
## [153] "Number of first-time undergraduates - foreign countries"                               
## [154] "Percent of first-time undergraduates - foreign countries"                              
## [155] "Number of first-time undergraduates - residence unknown"                               
## [156] "Percent of first-time undergraduates - residence unknown"                              
## [157] "Graduation rate - Bachelor degree within 4 years, total"                               
## [158] "Graduation rate - Bachelor degree within 5 years, total"                               
## [159] "Graduation rate - Bachelor degree within 6 years, total"                               
## [160] "Percent of freshmen receiving any financial aid"                                       
## [161] "Percent of freshmen receiving federal, state, local or institutional grant aid"        
## [162] "Percent of freshmen  receiving federal grant aid"                                      
## [163] "Percent of freshmen receiving Pell grants"                                             
## [164] "Percent of freshmen receiving other federal grant aid"                                 
## [165] "Percent of freshmen receiving state/local grant aid"                                   
## [166] "Percent of freshmen receiving institutional grant aid"                                 
## [167] "Percent of freshmen receiving student loan aid"                                        
## [168] "Percent of freshmen receiving federal student loans"                                   
## [169] "Percent of freshmen receiving other loan aid"                                          
## [170] "Endowment assets (year end) per FTE enrollment (GASB)"                                 
## [171] "Endowment assets (year end) per FTE enrollment (FASB)"
speed_plot <- join_data2 %>%
  mutate('conference' = as.factor(join_data2$conference)) %>%
  ggplot(aes(x = sec_duration, y = bpm, color = join_data2$conference, text = paste0("<b>Song Name: </b>", join_data2$song_name, "<br>", "<b>School: <b>", join_data2$school))) +
  geom_point() +
  geom_hline(yintercept = median(join_data2$bpm)) +
  geom_vline(xintercept = median(join_data2$sec_duration)) +
  xlab("Song Length (Seconds)") +
  ylab("Song Speed (bpm)") +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank())

speed_plotly <- ggplotly(speed_plot, tooltip = "text") %>%
  hide_legend()

speed_plotly
join_data2 <- join_data2 %>%
  mutate(speed_cat = if_else(join_data2$bpm < 140 & join_data2$sec_duration < 67, 'Short & Slow', if_else(join_data2$bpm < 140 & join_data2$sec_duration > 67, 'Long & Slow', if_else(join_data2$bpm > 140 & join_data2$sec_duration < 67, 'Short & Fast', 'Long & Fast')))) 

map <- map_data('state')

join_data2$year <- as.numeric(as.character(join_data2$year))
## Warning: NAs introduced by coercion
filter1900 <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1900)

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filter1900, 
             aes(x = filter1900$'Longitude location of institution', y = filter1900$'Latitude location of institution', color = filter1900$student_writer)) +
  geom_label(data = filter1900, aes(filter1900$'Longitude location of institution', filter1900$'Latitude location of institution', label = year, color = filter1900$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap

filter1905 <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1910 & join_data2$year > 1900)

filter1905T <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1910)

filter1905T$student_writer <- factor(filter1905T$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filter1905T, 
             aes(x = filter1905T$'Longitude location of institution', y = filter1905T$'Latitude location of institution', color = filter1905T$student_writer)) +
  geom_label(data=filter1905, aes(filter1905$'Longitude location of institution', filter1905$'Latitude location of institution', label = year, color = filter1905$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1915 & join_data2$year > 1910)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1915)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 1 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1920 & join_data2$year > 1915)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1920)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap

## Warning: Removed 1 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1930 & join_data2$year > 1920)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1930)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1940 & join_data2$year > 1930)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1940)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1950 & join_data2$year > 1940)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1950)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 4 rows containing missing values (geom_point).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1960 & join_data2$year > 1950)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1960)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

filter <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1970 & join_data2$year > 1960)

filterT <- join_data2 %>%
  filter(join_data2$year, join_data2$year < 1970)

filterT$student_writer <- factor(filterT$student_writer, levels = c("Yes", "No", "Unknown"))

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  geom_label(data=filter, aes(filter$'Longitude location of institution', filter$'Latitude location of institution', label = year, color = filter$student_writer), hjust = 0, nudge_x = 0.3, check_overlap = TRUE)+
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Ignoring unknown parameters: check_overlap
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_label).

map %>%
  ggplot(aes(x = long, y = lat)) + 
  geom_path(aes(group = group)) + 
  geom_point(data = filterT, 
             aes(x = filterT$'Longitude location of institution', y = filterT$'Latitude location of institution', color = filterT$student_writer)) +
  scale_colour_brewer(palette = "Set1") +
  theme(panel.grid = element_blank()) +
  coord_map() +
  theme(legend.position = 'none', axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank()) +
  ylab("") +
  xlab("")
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 <- join_data2 %>%
  mutate(age = if_else(join_data2$year <= 1922, 'old', 'new'))

join_data2$student_writer <- factor(join_data2$student_writer, levels = c("Yes", "No", "Unknown"))

join_data2 %>%
  group_by(age, student_writer) %>%
  ggplot(aes(age, fill = student_writer), rm.na = TRUE) +
  geom_bar()+
  scale_colour_brewer(palette = "Set1")

join_data2 %>%
  group_by(speed_cat) %>%
  ggplot(aes(x = speed_cat, y = year, color = student_writer)) +
  geom_point()+
  geom_boxplot() +
  scale_colour_brewer(palette = "Set1") +
  xlab("Length & Speed")
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).
## Warning: Removed 5 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = Niche_Athletic_Rank, y = X2019_FB_Wins, color = as.factor(number_fights))) +
  geom_point() +
   scale_colour_brewer(palette = "Set1")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 3 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = join_data2$'Tuition and fees, 2013-14', y = Niche_Athletic_Rank, color = speed_cat)) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = Niche_Party_Rank, y = join_data2$'Graduation rate - Bachelor degree within 4 years, total', color = nonsense)) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = men, y = join_data2$'Percent of total enrollment that are women')) +
  geom_boxplot() +
  xlab("Does the song refer to either men or boys?") +
  ylab("Percentage of Students who are Female")
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

join_data2 %>%
  ggplot(aes(x = spelling, y = join_data2$'SAT Writing 75th percentile score', color = spelling)) +
  geom_point() +
  scale_colour_brewer(palette = "Set1") +
  xlab("Does the song spell out words?") +
  ylab("SAT 75th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).

join_data2 %>%
  ggplot(aes(x = spelling, y = join_data2$'SAT Writing 25th percentile score', color = spelling)) +
  geom_point() +
  scale_colour_brewer(palette = "Set1") +
  xlab("Does the song spell out words?") +
  ylab("SAT 25th Percentile Score")
## Warning: Removed 28 rows containing missing values (geom_point).

join_data2 %>%
  group_by(speed_cat) %>%
  ggplot(aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank, color = speed_cat)) +
  geom_point()+
  scale_colour_brewer(palette = "Set1") +
  xlab("Athletic Ranking") +
  ylab("Party Ranking")

join_data_longer <- join_data2 %>%
  pivot_longer(victory:spelling, names_to = 'lyric', values_to='score')

join_data_longer %>%
  group_by(lyric) %>%
  ggplot(aes(x = score, y = X2019_FB_Wins)) +
  geom_boxplot() +
  facet_wrap(~lyric)

henrys part
matthews part
r join_data = join_data %>% rename(Wins = X2019_FB_Wins, Losses = X2019_FB_Losses) ggplot(join_data, aes(x = Niche_Athletic_Rank, y = Niche_Party_Rank)) + geom_point(aes(size = Wins / 3, shape = student_writer, color = conference)) + xlim(-10, 110) + ylim(-10, 270) + geom_vline(xintercept = 50) + geom_hline(yintercept = 120) + ggtitle("Athletic rank vs Party rank") + xlab("Athletic Rank") + ylab("Party Rank")
## Warning: Removed 9 rows containing missing values (geom_point).
r ggplot(join_data, aes(x = Losses, y = Wins)) + geom_point(aes(color = conference, shape = student_writer, size = join_data$Niche_Party_Rank)) + ggtitle("Wins vs Losses")
## Warning: Removed 6 rows containing missing values (geom_point).
r ggplot(join_data, aes(x = join_data$`ACT Composite 25th percentile score`, y = join_data$Niche_Party_Rank)) + geom_point(aes(color = conference, shape = student_writer)) + geom_text(label = join_data$school, check_overlap = TRUE) + xlab("ACT Composite 25th percentile score") + ylab("Party Rank") + ggtitle("ACT Composite 25th percentile score vs Party rank")
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_text).
## Party schools fight songs writer varies while party rank < 100 but mostly written by non-students while party rank > 100. ## The biggest Party and athletic schools are from the Big Ten and SEC. The smallest ones are from ACC and the Pac-12.
## Schools with a non student fight song have a better football record than schools with a student writer ## The schools with the best football record also have a low party rank
## Schools between an ACT 25th percentile of 20 and 26 have the highest party rank. Then the party rank increases exponentially as the ACT composite scores exceed 28

anns part (if doing more than cleaning)